/*
	Purpose: Using the sample of 1940 Census fathers aged 30-50
	         (from 0b), this file creates father income scores
	         at the preferred occupation x race x south level. 

	Creates: avgincomes_fathers1940_byrace_bysouth.dta
*/
clear
set more off
cd "$Mydirectory1/1_DataSources/CensusData/"

	use ./input/Census1940_fathers_ages30to50_forIncomeScores.dta, clear 
	gen number=1

**-----------------------------------------------------------------------------
**-----------------------------------------------------------------------------

*******************
*** TEMPLATES
*******************

* Template 1: occupation x race 
preserve

	collapse (min) race, by(occ1950ej)
	
	expand 2
	bysort occ1950ej: replace race=2 if _n==2

	tempfile occbyrace
	save `occbyrace'

restore 
	
* Template 2: race x south
preserve 
	
	collapse (min) race, by(south_merge)
	expand 2
	bysort south_merge: replace race=2 if _n==2
	
	tempfile south
	save `south'
	
restore
	
* Template 3: occupation x race x south
preserve

	use `occbyrace', clear
	joinby race using `south'
	
	tempfile template
	save `template'
	
restore

	tempfile  fulldata
	save `fulldata'


*******************
*** COLLAPSE 
*******************
	
	//Version 1: no weights
	collapse (rawsum) number  (mean)  incwage fam_income hh_income ///
	(p50) number_children_HH number_children_fam number_people_HH number_people_fam , by(occ1950ej race south_merge) 

	tempfile income
	save `income'
	
	//Version 2: weight by number of children 
	use `fulldata', clear
	collapse  (mean)  incwage fam_income hh_income [aw=number_children], by(occ1950ej race south_merge) 
	
	foreach x in incwage fam_income hh_income {
		rename `x' `x'_altwgt
	}

	tempfile income_alt
	save `income_alt'

* Merge into template 
	use `template'
	merge 1:1 occ1950ej race south_merge using `income'
	drop _merge
	replace number=0 if number==.
	
	merge 1:1 occ1950ej race south_merge using `income_alt', nogen keep(1 3)

* Count missings
	assert incwage!=.

*------------------------------------------------------------------------------------*
*------------------------------------------------------------------------------------*
**********
* Save
**********

	label var incwage "Coarse income score, incwage, by race and south"
	label var hh_income "Coarse income score, household income, by race and south"
	label var fam_income "Coarse income score, family income, by race and south"
	
	label var incwage_alt "Coarse income score, incwage, by race and south, alt weight"
	label var hh_income_alt "Coarse income score, household income, by race and south, alt weight"
	label var fam_income_alt "Coarse income score, family income, by race and south, alt weight"
	
	label var race "Respondent race"
	label var number "Number of obs in 1940 occ x race x region cell"

	rename incwage avgincwage_1940_byrace_bysouth
	rename fam_income avg_faminc_1940_byrace_bysouth
	rename hh_income avg_HHinc_1940_byrace_bysouth
	
	rename incwage_alt avgincwage_1940_byr_bys_altwgt
	rename fam_income_alt avg_faminc_1940_byr_bys_altwgt
	rename hh_income_alt avg_HHinc_1940_byr_bys_altwgt
	
	label var number_children_HH "Median number of children in HH in this occ x race x region cell, 1940"
	label var number_children_fam "Median number of children in fam. in this occ x race x region cell, 1940"
	label var number_people_HH "Median number of people in HH in this occ x race x region cell, 1940"
	label var number_people_fam "Median number of people in fam. in this occ x race x region cell, 1940"
	
	rename occ1950ej fatheroccej
	rename number number_1940obs_byrace_bysouth

	save ./output/avgincomes_fathers1940_byrace_bysouth.dta, replace
